#from lxml import etree
from lxml import html
etree = html.etree

# import xml.etree.ElementTree as etree

text = '''
<div>
<ul>
<li class = "item-0"><a href = "link1.html">first item</a></li>
<li class = "item-1"><a href = "link2.html">second item</a></li>
<li class = "item-inactive"><a href = "link3.html">third item</a></li>
<li class = "item-1"><a href = "link4.html">fourth item</a></li>
<li class = "item-0"><a href = "link5.html">fif item</a>
</ul>
</div>
'''
html = etree.HTML(text)
print(html)
print("****************")
result = etree.tostring(html)   #标签补齐了，还添加了html，body节点
print(result)
print("****************")
print(result.decode("utf-8"))